# coding:utf-8
from pyspark import SparkConf, SparkContext
import os

os.environ['JAVA_HOME'] = '/server/jdk'

if __name__ == '__main__':
    conf = SparkConf().setAppName('test').setAppName('local[*]')
    sc = SparkContext(conf=conf)

    rdd = sc.parallelize([('a',1),('a',1),('b',1),('b',1),('a',1)])
    # reduceByKey 对相同key的数据进行聚合相加(针对KV型数据结构(二元元组))
    print(rdd.reduceByKey(lambda a,b:a+b).collect())


    # mapValues 对value进行操作
    print(rdd.mapValues(lambda x:x*10).collect())